library(SingleCellExperiment)
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
library(scran)
## Loading required package: scuttle
library(scater)
## Loading required package: ggplot2
sce <- readRDS("../data/SC2018/231113_sceAfterDoubletRemoval.rds")
rownames(sce) <- rowData(sce)$geneSymbol
# T-cell markers
plotReducedDim(sce,
dimred = "UMAP_fastMNNCorrected",
point_size=1/2,
colour_by="CD3D")

plotReducedDim(sce,
dimred = "UMAP_fastMNNCorrected",
point_size=1/2,
colour_by="CD3E")

plotReducedDim(sce,
dimred = "UMAP_fastMNNCorrected",
point_size=1/2,
colour_by="CD3G")

plotReducedDim(sce,
dimred = "UMAP_fastMNNCorrected",
point_size=1/2,
colour_by="TRAC")

# B-cell markers
plotReducedDim(sce,
dimred = "UMAP_fastMNNCorrected",
point_size=1/2,
colour_by="MS4A1")

plotReducedDim(sce,
dimred = "UMAP_fastMNNCorrected",
point_size=1/2,
colour_by="CD19")

# NK cell markers
plotReducedDim(sce,
dimred = "UMAP_fastMNNCorrected",
point_size=1/2,
colour_by="KLRF1")

# CD14 monocytes
plotReducedDim(sce,
dimred = "UMAP_fastMNNCorrected",
point_size=1/2,
colour_by="CD14")

# CD16 monocytes
plotReducedDim(sce,
dimred = "UMAP_fastMNNCorrected",
point_size=1/2,
colour_by="FCGR3A")

# erythrocytes
plotReducedDim(sce,
dimred = "UMAP_fastMNNCorrected",
point_size=1/2,
colour_by="HBA1")

## three smaller clusters
## Dendritic cells: no longer present, it seems
## possibly removed due to doublet calling
plotReducedDim(sce,
dimred = "UMAP_fastMNNCorrected",
point_size=1/2,
colour_by="LILRA4")

plotReducedDim(sce,
dimred = "UMAP_fastMNNCorrected",
point_size=1/2,
colour_by="CLEC4C")

## MKI67+ proliferating cells: also likely no longer present
plotReducedDim(sce,
dimred = "UMAP_fastMNNCorrected",
point_size=1/2,
colour_by="MKI67")

plotReducedDim(sce,
dimred = "UMAP_fastMNNCorrected",
point_size=1/2,
colour_by="CDK1")

## megakaryocytes
plotReducedDim(sce,
dimred = "UMAP_fastMNNCorrected",
point_size=1/2,
colour_by="ITGB3")

plotReducedDim(sce,
dimred = "UMAP_fastMNNCorrected",
point_size=1/2,
colour_by="CMTM5")

set.seed(464688)
# Build a shared nearest-neighbor graph from PCA space
graph <- buildSNNGraph(sce,
use.dimred = 'PCA')
# Leiden clustering on the SNN graph
cluster_leiden <- factor(igraph::cluster_leiden(graph = graph,
resolution_parameter = 0.008)$membership)
nlevels(cluster_leiden)
## [1] 13
colData(sce)$cluster_leiden <- cluster_leiden
plotReducedDim(sce,
dimred = "UMAP_fastMNNCorrected",
point_size=1/2,
colour_by="cluster_leiden") +
scale_color_manual(values=dayjob::paletteDiscrete(values=unique(sce$cluster_leiden), set="bear"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

ggsave("../plots/231115_umapSuperCentenarians_firstclsutering.pdf", width=12, height=12)
## annotate
cellTypeLabels <- rep(NA, length=length(cluster_leiden))
cellTypeLabels[cluster_leiden ==1] <- "NK"
cellTypeLabels[cluster_leiden ==2] <- "T-cell"
cellTypeLabels[cluster_leiden ==3] <- "CD14 Monocyte"
cellTypeLabels[cluster_leiden ==4] <- "CD16 Monocyte"
cellTypeLabels[cluster_leiden ==5] <- "T-cell"
cellTypeLabels[cluster_leiden ==6] <- "B-cell"
cellTypeLabels[cluster_leiden ==7] <- "CD14 Monocyte"
cellTypeLabels[cluster_leiden ==8] <- "T-cell"
cellTypeLabels[cluster_leiden ==9] <- "unassigned"
cellTypeLabels[cluster_leiden ==10] <- "unassigned"
cellTypeLabels[cluster_leiden ==11] <- "Megakaryocyte"
cellTypeLabels[cluster_leiden ==12] <- "Erythrocyte"
cellTypeLabels[cluster_leiden ==13] <- "unassigned" # only one cell
sce$cellType <- cellTypeLabels
sce <- sce[,!sce$cellType == "unassigned"]
plotReducedDim(sce,
dimred = "UMAP_fastMNNCorrected",
point_size=1/2,
colour_by="cellType") +
scale_color_manual(values=dayjob::paletteDiscrete(values=unique(sce$cellType), set="bear"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Differential abundance on cell type level
library(dayjob)
cellCountsLong <- getCellPopulationCounts(sce,
patientVar = "sample",
cellTypeVar = "cellType",
group = "condition",
format="long")
## Loading required package: tidyverse
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ✔ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::collapse() masks IRanges::collapse()
## ✖ dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count() masks matrixStats::count()
## ✖ dplyr::desc() masks IRanges::desc()
## ✖ tidyr::expand() masks S4Vectors::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks S4Vectors::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce() masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename() masks S4Vectors::rename()
## ✖ dplyr::slice() masks IRanges::slice()
cellCountsWide <- getCellPopulationCounts(sce,
patientVar = "sample",
cellTypeVar = "cellType",
group = "condition",
format="wide")
countMatrix <- cellCountsWide[,-c(1:2)]
countMatrix <- t(as.matrix(countMatrix))
cellCountsLong <- cellCountsLong %>%
group_by(patient, group) %>%
mutate(totalCells = sum(nCells),
nCellTypes = n(),
geoMean = prod(nCells+1)^(1/nCellTypes),
CLR = log((nCells+1) / geoMean )) %>%
ungroup() %>%
mutate(fractionCells = nCells / totalCells)
ggplot(cellCountsLong, aes(x=celltype, y=fractionCells, fill=group)) +
geom_boxplot(outlier.size = 0) +
geom_point(position = position_dodge(width = .75)) +
theme_classic()

ggplot(cellCountsLong, aes(x=celltype, y=CLR, fill=group)) +
geom_boxplot(outlier.size = 0) +
geom_point(position = position_dodge(width = .75)) +
theme_classic()

voomCLR
library(limma)
##
## Attaching package: 'limma'
## The following object is masked from 'package:scater':
##
## plotMDS
## The following object is masked from 'package:BiocGenerics':
##
## plotMA
library(voomCLR)
design <- model.matrix( ~ group, data=cellCountsWide)
v <- voomCLR(counts = countMatrix,
design = design)
fit <- lmFit(v, design)
plotBeta(fit)


fit <- applyBiasCorrection(fit)
fit <- eBayes(fit)
tt <- topTable(fit, coef=2, n=Inf)
tt
## logFC AveExpr t P.Value adj.P.Val
## B-cell -1.82591773 0.4269248 -4.7719184 9.658185e-06 0.0000676073
## Megakaryocyte -1.74369386 -3.6121741 -2.9092873 4.852084e-03 0.0169822935
## CD16 Monocyte -0.48557325 -0.5068662 -1.0101589 3.158983e-01 0.5528219425
## Erythrocyte 0.32720225 -2.0724494 0.5465086 5.864542e-01 0.6841965488
## NK 0.40359641 1.6286420 1.1838294 2.404851e-01 0.5528219425
## CD14 Monocyte -0.09978169 1.4810835 -0.3173037 7.519576e-01 0.7519576434
## T-cell -0.13732434 2.6548395 -0.5481404 5.853393e-01 0.6841965488
## B
## B-cell 3.219499
## Megakaryocyte -2.266429
## CD16 Monocyte -5.831231
## Erythrocyte -6.017585
## NK -6.040879
## CD14 Monocyte -6.675412
## T-cell -6.815885
NB-GLM
library(glmmTMB)
## Warning in checkMatrixPackageVersion(): Package version inconsistency detected.
## TMB was built with Matrix version 1.4.1
## Current Matrix version is 1.5.1
## Please re-install 'TMB' from source using install.packages('TMB', type = 'source') or ask CRAN for a binary version of 'TMB' matching CRAN's 'Matrix' package
## Warning in checkDepPackageVersion(dep_pkg = "TMB"): Package version inconsistency detected.
## glmmTMB was built with TMB version 1.9.0
## Current TMB version is 1.9.1
## Please re-install glmmTMB from source or restore original 'TMB' package (see '?reinstalling' for more information)
resDfNB <- data.frame(population=rownames(countMatrix),
diff=rep(NA,nrow(countMatrix)),
se=rep(NA,nrow(countMatrix)),
pval=rep(NA,nrow(countMatrix)))
for(pp in 1:nrow(countMatrix)){
curY <- countMatrix[pp,]
m_p <- glmmTMB(curY ~ -1 + design,
family=nbinom2(link="log"),
offset = log(colSums(countMatrix)))
resDfNB[pp,2:4] <- c(summary(m_p)$coef$cond["designgroupSC",c(1,2,4)])
}
## Warning in glmmTMB(curY ~ -1 + design, family = nbinom2(link = "log"), offset =
## log(colSums(countMatrix))): use of the 'data' argument is recommended
## Warning in glmmTMB(curY ~ -1 + design, family = nbinom2(link = "log"), offset =
## log(colSums(countMatrix))): use of the 'data' argument is recommended
## Warning in glmmTMB(curY ~ -1 + design, family = nbinom2(link = "log"), offset =
## log(colSums(countMatrix))): use of the 'data' argument is recommended
## Warning in glmmTMB(curY ~ -1 + design, family = nbinom2(link = "log"), offset =
## log(colSums(countMatrix))): use of the 'data' argument is recommended
## Warning in glmmTMB(curY ~ -1 + design, family = nbinom2(link = "log"), offset =
## log(colSums(countMatrix))): use of the 'data' argument is recommended
## Warning in glmmTMB(curY ~ -1 + design, family = nbinom2(link = "log"), offset =
## log(colSums(countMatrix))): use of the 'data' argument is recommended
## Warning in glmmTMB(curY ~ -1 + design, family = nbinom2(link = "log"), offset =
## log(colSums(countMatrix))): use of the 'data' argument is recommended
resDfNB$padj <- p.adjust(resDfNB$pval, "fdr")
resDfNB[order(abs(resDfNB$pval)),]
## population diff se pval padj
## 1 B-cell -1.487083994 0.3376134 1.059331e-05 7.415315e-05
## 5 Megakaryocyte -1.340590157 0.5577411 1.623423e-02 5.681982e-02
## 4 Erythrocyte 1.000885355 0.6247627 1.091501e-01 2.546836e-01
## 6 NK 0.423289496 0.3529722 2.304445e-01 4.032778e-01
## 3 CD16 Monocyte -0.275485966 0.3269730 3.994889e-01 4.898243e-01
## 2 CD14 Monocyte 0.166801239 0.2067743 4.198494e-01 4.898243e-01
## 7 T-cell 0.005603286 0.1412258 9.683514e-01 9.683514e-01
edgeR
library(edgeR)
##
## Attaching package: 'edgeR'
## The following object is masked from 'package:SingleCellExperiment':
##
## cpm
d <- DGEList(countMatrix)
d <- calcNormFactors(d)
d <- estimateDisp(d, design)
fit <- glmFit(d, design)
lrt <- glmLRT(fit, coef=2)
lrt$table$padj <- p.adjust(lrt$table$PValue, method="BH")
lrt$table[order(lrt$table$LR, decreasing=TRUE),]
## logFC logCPM LR PValue padj
## B-cell -2.21817387 16.37263 16.034447094 6.220045e-05 0.0004354031
## Megakaryocyte -1.92182993 10.88345 5.918772905 1.498039e-02 0.0524313589
## Erythrocyte 1.46652866 13.19023 2.346807326 1.255399e-01 0.2929264022
## CD16 Monocyte -0.52063402 14.63788 0.871850660 3.504435e-01 0.6129077775
## NK 0.44714056 17.83270 0.507842587 4.760743e-01 0.6129077775
## CD14 Monocyte 0.24692029 17.30961 0.403378642 5.253495e-01 0.6129077775
## T-cell -0.01303389 18.91406 0.002409311 9.608518e-01 0.9608517682
LinDA
library(MicrobiomeStat)
library(phyloseq)
##
## Attaching package: 'phyloseq'
## The following object is masked from 'package:SummarizedExperiment':
##
## distance
## The following object is masked from 'package:Biobase':
##
## sampleNames
## The following object is masked from 'package:GenomicRanges':
##
## distance
## The following object is masked from 'package:IRanges':
##
## distance
lindaRes <- LinDA::linda(otu.tab = countMatrix, # rows features, cols samples
meta = as.data.frame(cellCountsWide),
formula = '~ group',
type = 'count',
adaptive=TRUE,
imputation = FALSE)
## Imputation approach is used.
lindaRes$output$groupSC[order(abs(lindaRes$output$groupSC$stat), decreasing=TRUE),]
## baseMean log2FoldChange lfcSE stat pvalue
## B-cell 150488.159 -2.64906617 0.5257893 -5.0382653 0.0005079197
## Megakaryocyte 2379.046 -2.62658353 0.7513855 -3.4956535 0.0057682458
## CD16 Monocyte 26655.514 -0.69089188 0.6666986 -1.0362882 0.3244768990
## NK 134889.885 0.59965476 0.6346155 0.9449104 0.3669753458
## T-cell 528302.252 -0.23711169 0.3320906 -0.7139969 0.4915579437
## Erythrocyte 3315.389 0.43444042 1.1260370 0.3858136 0.7077152944
## CD14 Monocyte 153969.756 -0.09222699 0.3090940 -0.2983785 0.7715221240
## padj reject df
## B-cell 0.003555438 TRUE 10
## Megakaryocyte 0.020188860 TRUE 10
## CD16 Monocyte 0.642206855 FALSE 10
## NK 0.642206855 FALSE 10
## T-cell 0.688181121 FALSE 10
## Erythrocyte 0.771522124 FALSE 10
## CD14 Monocyte 0.771522124 FALSE 10
Summary of results
allPopulations <- rownames(countMatrix)
nPopulations <- nrow(countMatrix)
allMethods <- c("voomCLR", "LinDA", "edgeR", "NBGLM")
nMethods <- length(allMethods)
resDfAll <- data.frame(population = rep(allPopulations, nMethods),
method = rep(allMethods, each=nPopulations),
log2FC = c(tt[allPopulations,]$logFC,
lindaRes$output$groupSC[allPopulations,]$log2FoldChange,
lrt$table[allPopulations,]$logFC,
log2(exp(resDfNB$diff))),
padj = c(tt[allPopulations,]$adj.P.Val,
lindaRes$output$groupSC[allPopulations,]$padj,
lrt$table[allPopulations,]$padj,
resDfNB$padj)
)
resDfAll$DA <- resDfAll$padj <= 0.05
pComp <- ggplot(resDfAll,
aes(x=population, y=method, color=log2FC, size=DA)) +
geom_point()+
facet_grid(~population, scales = "free_x", space = "free")+
theme_light()+
scale_color_gradient2(low= "blue", mid="gray", high= "red") +
theme(axis.text.x = element_text(angle=45, hjust = 1, vjust = 1),
strip.text.x = element_text(size=14, angle=90, hjust = 1, vjust = 1))
pComp
## Warning: Using size for a discrete variable is not advised.

resDfAll$DA <- resDfAll$padj <= 0.06 # to show NB models are also at adjusted p-value around .05
pComp <- ggplot(resDfAll,
aes(x=population, y=method, color=log2FC, size=DA)) +
geom_point()+
facet_grid(~population, scales = "free_x", space = "free")+
theme_light()+
scale_color_gradient2(low= "blue", mid="gray", high= "red") +
theme(axis.text.x = element_text(angle=45, hjust = 1, vjust = 1),
strip.text.x = element_text(size=14, angle=90, hjust = 1, vjust = 1))
pComp
## Warning: Using size for a discrete variable is not advised.
